// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
 *******************************************************************************
 * Copyright (C) 2003-2016 International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
package com.ibm.icu.dev.test.rbbi;

import com.ibm.icu.dev.test.CoreTestFmwk;
import com.ibm.icu.dev.test.rbbi.SegmentationRule.BreakContext;
import com.ibm.icu.dev.test.rbbi.SegmentationRule.Resolution;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

/**
 * Monkey testing of RuleBasedBreakIterator. The old monkey test, now using regexes generated by the
 * Unicode tools. The new monkey test is class RBBIMonkeyTest.
 *
 * <p>Monkey tests for RBBI. These tests have independent implementations of the Unicode TR boundary
 * rules, and compare results between these and ICU's implementation, using random data.
 *
 * <p>Tests cover Grapheme Cluster (char), Word and Line breaks
 *
 * <p>Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp
 */
@RunWith(JUnit4.class)
public class RBBITestMonkey extends CoreTestFmwk {
    //
    //     class RBBIMonkeyKind
    //
    //        Monkey Test for Break Iteration
    //        Abstract interface class.   Concrete derived classes independently
    //        implement the break rules for different iterator types.
    //
    //        The Monkey Test itself uses doesn't know which type of break iterator it is
    //        testing, but works purely in terms of the interface defined here.
    //
    abstract static class RBBIMonkeyKind {
        RBBIMonkeyKind() {
            sets = new ArrayList<>();
            classNames = new ArrayList<>();
            fAppliedRules = new ArrayList<>();
            dictionarySet = new UnicodeSet();
        }

        // Return a List of UnicodeSets, representing the character classes used
        //   for this type of iterator.
        List<UnicodeSet> charClasses() {
            return sets;
        }

        // Set the test text on which subsequent calls to next() will operate
        void setText(StringBuffer s) {
            text = s;
            prepareAppliedRules(s.length());
            StringBuilder remapped = new StringBuilder(s.toString());
            resolved = new BreakContext[s.length() + 1];
            for (int i = 0; i < resolved.length; ++i) {
                resolved[i] = new BreakContext(i);
            }
            for (final SegmentationRule rule : rules) {
                rule.apply(remapped, resolved);
            }
            for (int i = 0; i < resolved.length; ++i) {
                if (i > 0
                        && i < s.length()
                        && UTF16.isLeadSurrogate(s.charAt(i - 1))
                        && UTF16.isTrailSurrogate(s.charAt(i))) {
                    continue;
                }
                if (resolved[i].appliedRule == null) {
                    throw new IllegalArgumentException("Failed to resolve at " + i);
                }
                setAppliedRule(i, resolved[i].appliedRule.name());
            }
        }

        // Find the next break position, starting from the specified position.
        // Return -1 after reaching end of string.
        int next(int startPos) {
            for (int i = startPos + 1; i < resolved.length; ++i) {
                if (resolved[i].appliedRule != null
                        && resolved[i].appliedRule.resolution() == Resolution.BREAK) {
                    return i;
                }
            }
            return -1;
        }

        // Name of each character class, parallel with charClasses. Used for debugging output
        // of characters.
        List<String> characterClassNames() {
            return classNames;
        }

        UnicodeSet getDictionarySet() {
            return dictionarySet;
        }

        void setAppliedRule(int position, String value) {
            fAppliedRules.set(position, value);
        }

        String getAppliedRule(int position) {
            return fAppliedRules.get(position);
        }

        String classNameFromCodepoint(int c) {
            // Simply iterate through fSets to find character's class
            for (int aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
                UnicodeSet classSet = (UnicodeSet) charClasses().get(aClassNum);
                if (classSet.contains(c)) {
                    return classNames.get(aClassNum);
                }
            }
            return "bad class name";
        }

        int maxClassNameSize() {
            int maxSize = 0;
            for (int aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
                if (classNames.get(aClassNum).length() > maxSize) {
                    maxSize = classNames.get(aClassNum).length();
                }
            }
            return maxSize;
        }

        // Clear `appliedRules` and fill it with empty strings in the size of test text.
        void prepareAppliedRules(int size) {
            // Remove all the information in the `appliedRules`.
            fAppliedRules.clear();
            fAppliedRules.ensureCapacity(size + 1);
            while (fAppliedRules.size() < size + 1) {
                fAppliedRules.add("");
            }
        }

        static class NamedSet {
            String name;
            UnicodeSet set;

            NamedSet(String name, UnicodeSet set) {
                this.name = name;
                this.set = set;
            }

            NamedSet(String name, String pattern) {
                this(name, new UnicodeSet(pattern));
            }
        }
        ;

        // A Character Property, one of the constants defined in class UProperty.
        //   The value of this property will be displayed for the characters
        //    near any test failure.
        int fCharProperty;

        List<UnicodeSet> sets;
        ArrayList<String> classNames;
        List<SegmentationRule> rules;
        UnicodeSet dictionarySet;
        private ArrayList<String> fAppliedRules;
        private StringBuffer text;
        private SegmentationRule.BreakContext[] resolved;
    }

    /** Monkey test subclass for testing Character (Grapheme Cluster) boundaries. */
    static class RBBICharMonkey extends RBBIMonkeyKind {
        RBBICharMonkey() {
            fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            // These two could be part of the rules.
            rules.add(new RegexRule("GB1 sot ÷ Any", "^", Resolution.BREAK, ""));
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("GB2 Any ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Grapheme_Cluster_Break=CR}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Grapheme_Cluster_Break=LF}]")));
            partition.add(
                    new NamedSet(
                            "Control", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Control}]")));
            partition.add(
                    new NamedSet(
                            "Extend_ConjunctLinker",
                            new UnicodeSet(
                                    "[\\p{Grapheme_Cluster_Break=Extend}&\\p{Indic_Conjunct_Break=Linker}]")));
            partition.add(
                    new NamedSet(
                            "Extend_ConjunctExtendermConjunctLinker",
                            new UnicodeSet(
                                    "[\\p{Grapheme_Cluster_Break=Extend}&[\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]-\\p{Indic_Conjunct_Break=Linker}]")));
            partition.add(
                    new NamedSet(
                            "ExtendmConjunctLinkermConjunctExtender",
                            new UnicodeSet(
                                    "[\\p{Grapheme_Cluster_Break=Extend}-\\p{Indic_Conjunct_Break=Linker}-[\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]]")));
            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Grapheme_Cluster_Break=ZWJ}]")));
            partition.add(
                    new NamedSet(
                            "RI",
                            new UnicodeSet("[\\p{Grapheme_Cluster_Break=Regional_Indicator}]")));
            partition.add(
                    new NamedSet(
                            "Prepend", new UnicodeSet("[\\p{Grapheme_Cluster_Break=Prepend}]")));
            partition.add(
                    new NamedSet(
                            "SpacingMark",
                            new UnicodeSet("[\\p{Grapheme_Cluster_Break=SpacingMark}]")));
            partition.add(new NamedSet("L", new UnicodeSet("[\\p{Grapheme_Cluster_Break=L}]")));
            partition.add(new NamedSet("V", new UnicodeSet("[\\p{Grapheme_Cluster_Break=V}]")));
            partition.add(new NamedSet("T", new UnicodeSet("[\\p{Grapheme_Cluster_Break=T}]")));
            partition.add(new NamedSet("LV", new UnicodeSet("[\\p{Grapheme_Cluster_Break=LV}]")));
            partition.add(new NamedSet("LVT", new UnicodeSet("[\\p{Grapheme_Cluster_Break=LVT}]")));
            partition.add(
                    new NamedSet(
                            "LinkingConsonant",
                            new UnicodeSet("[\\p{Indic_Conjunct_Break=Consonant}]")));
            partition.add(new NamedSet("ExtPict", new UnicodeSet("[\\p{Extended_Pictographic}]")));
            partition.add(
                    new NamedSet(
                            "XXmLinkingConsonantmExtPict",
                            new UnicodeSet(
                                    "[\\p{Grapheme_Cluster_Break=Other}-\\p{Indic_Conjunct_Break=Consonant}-\\p{Extended_Pictographic}]")));

            rules.add(
                    new RegexRule(
                            "$CR × $LF",
                            "\\p{Grapheme_Cluster_Break=CR}",
                            Resolution.NO_BREAK,
                            "\\p{Grapheme_Cluster_Break=LF}"));
            rules.add(
                    new RegexRule(
                            "( $Control | $CR | $LF ) ÷",
                            "( \\p{Grapheme_Cluster_Break=Control} | \\p{Grapheme_Cluster_Break=CR} | \\p{Grapheme_Cluster_Break=LF} )",
                            Resolution.BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "÷ ( $Control | $CR | $LF )",
                            "",
                            Resolution.BREAK,
                            "( \\p{Grapheme_Cluster_Break=Control} | \\p{Grapheme_Cluster_Break=CR} | \\p{Grapheme_Cluster_Break=LF} )"));
            rules.add(
                    new RegexRule(
                            "$L × ( $L | $V | $LV | $LVT )",
                            "\\p{Grapheme_Cluster_Break=L}",
                            Resolution.NO_BREAK,
                            "( \\p{Grapheme_Cluster_Break=L} | \\p{Grapheme_Cluster_Break=V} | \\p{Grapheme_Cluster_Break=LV} | \\p{Grapheme_Cluster_Break=LVT} )"));
            rules.add(
                    new RegexRule(
                            "( $LV | $V ) × ( $V | $T )",
                            "( \\p{Grapheme_Cluster_Break=LV} | \\p{Grapheme_Cluster_Break=V} )",
                            Resolution.NO_BREAK,
                            "( \\p{Grapheme_Cluster_Break=V} | \\p{Grapheme_Cluster_Break=T} )"));
            rules.add(
                    new RegexRule(
                            "( $LVT | $T) × $T",
                            "( \\p{Grapheme_Cluster_Break=LVT} | \\p{Grapheme_Cluster_Break=T})",
                            Resolution.NO_BREAK,
                            "\\p{Grapheme_Cluster_Break=T}"));
            rules.add(
                    new RegexRule(
                            "× ($Extend | $ZWJ)",
                            "",
                            Resolution.NO_BREAK,
                            "(\\p{Grapheme_Cluster_Break=Extend} | \\p{Grapheme_Cluster_Break=ZWJ})"));
            rules.add(
                    new RegexRule(
                            "× $SpacingMark",
                            "",
                            Resolution.NO_BREAK,
                            "\\p{Grapheme_Cluster_Break=SpacingMark}"));
            rules.add(
                    new RegexRule(
                            "$Prepend ×",
                            "\\p{Grapheme_Cluster_Break=Prepend}",
                            Resolution.NO_BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "$LinkingConsonant $ConjunctExtender* $ConjunctLinker $ConjunctExtender* × $LinkingConsonant",
                            "\\p{Indic_Conjunct_Break=Consonant} [\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]* \\p{Indic_Conjunct_Break=Linker} [\\p{Indic_Conjunct_Break=Linker}\\p{Indic_Conjunct_Break=Extend}]*",
                            Resolution.NO_BREAK,
                            "\\p{Indic_Conjunct_Break=Consonant}"));
            rules.add(
                    new RegexRule(
                            "$ExtPict $Extend* $ZWJ × $ExtPict",
                            "\\p{Extended_Pictographic} \\p{Grapheme_Cluster_Break=Extend}* \\p{Grapheme_Cluster_Break=ZWJ}",
                            Resolution.NO_BREAK,
                            "\\p{Extended_Pictographic}"));
            rules.add(
                    new RegexRule(
                            "^ ($RI $RI)* $RI × $RI",
                            "^ (\\p{Grapheme_Cluster_Break=Regional_Indicator} \\p{Grapheme_Cluster_Break=Regional_Indicator})* \\p{Grapheme_Cluster_Break=Regional_Indicator}",
                            Resolution.NO_BREAK,
                            "\\p{Grapheme_Cluster_Break=Regional_Indicator}"));
            rules.add(
                    new RegexRule(
                            "[^$RI] ($RI $RI)* $RI × $RI",
                            "[^\\p{Grapheme_Cluster_Break=Regional_Indicator}] (\\p{Grapheme_Cluster_Break=Regional_Indicator} \\p{Grapheme_Cluster_Break=Regional_Indicator})* \\p{Grapheme_Cluster_Break=Regional_Indicator}",
                            Resolution.NO_BREAK,
                            "\\p{Grapheme_Cluster_Break=Regional_Indicator}"));
            // --- End of generated code. ---

            // TODO(egg): This could just as well be part of the rules…
            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }

    /** Word Monkey Test Class */
    static class RBBIWordMonkey extends RBBIMonkeyKind {
        RBBIWordMonkey() {
            fCharProperty = UProperty.WORD_BREAK;
            dictionarySet = new UnicodeSet("[[\uac00-\ud7a3][:Han:][:Hiragana:]]");
            dictionarySet.addAll(new UnicodeSet("[\\p{Word_Break = Katakana}]"));
            dictionarySet.addAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            // These two could be part of the rules.
            rules.add(new RegexRule("WB1 sot ÷ Any", "^", Resolution.BREAK, ""));
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("WB2 Any ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Word_Break=CR}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Word_Break=LF}]")));
            partition.add(new NamedSet("Newline", new UnicodeSet("[\\p{Word_Break=Newline}]")));
            partition.add(new NamedSet("Extend", new UnicodeSet("[\\p{Word_Break=Extend}]")));
            partition.add(new NamedSet("Format", new UnicodeSet("[[\\p{Word_Break=Format}]]")));
            partition.add(new NamedSet("Katakana", new UnicodeSet("[\\p{Word_Break=Katakana}]")));
            partition.add(
                    new NamedSet(
                            "ALetter_ExtPict",
                            new UnicodeSet(
                                    "[\\p{Word_Break=ALetter}&\\p{Extended_Pictographic}]")));
            partition.add(
                    new NamedSet(
                            "ALettermExtPict",
                            new UnicodeSet(
                                    "[\\p{Word_Break=ALetter}-\\p{Extended_Pictographic}]")));
            partition.add(new NamedSet("MidLetter", new UnicodeSet("[\\p{Word_Break=MidLetter}]")));
            partition.add(new NamedSet("MidNum", new UnicodeSet("[\\p{Word_Break=MidNum}]")));
            partition.add(new NamedSet("MidNumLet", new UnicodeSet("[\\p{Word_Break=MidNumLet}]")));
            partition.add(new NamedSet("Numeric", new UnicodeSet("[\\p{Word_Break=Numeric}]")));
            partition.add(
                    new NamedSet("ExtendNumLet", new UnicodeSet("[\\p{Word_Break=ExtendNumLet}]")));
            partition.add(
                    new NamedSet("RI", new UnicodeSet("[\\p{Word_Break=Regional_Indicator}]")));
            partition.add(
                    new NamedSet(
                            "Hebrew_Letter", new UnicodeSet("[\\p{Word_Break=Hebrew_Letter}]")));
            partition.add(
                    new NamedSet("Double_Quote", new UnicodeSet("[\\p{Word_Break=Double_Quote}]")));
            partition.add(
                    new NamedSet("Single_Quote", new UnicodeSet("[\\p{Word_Break=Single_Quote}]")));
            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Word_Break=ZWJ}]")));
            partition.add(
                    new NamedSet(
                            "ExtPictmALetter",
                            new UnicodeSet(
                                    "[\\p{Extended_Pictographic}-\\p{Word_Break=ALetter}]")));
            partition.add(new NamedSet("WSegSpace", new UnicodeSet("[\\p{Word_Break=WSegSpace}]")));
            partition.add(
                    new NamedSet(
                            "XXmExtPict",
                            new UnicodeSet("[\\p{Word_Break=Other}-\\p{Extended_Pictographic}]")));

            rules.add(
                    new RegexRule(
                            "$CR × $LF",
                            "\\p{Word_Break=CR}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=LF}"));
            rules.add(
                    new RegexRule(
                            "($Newline | $CR | $LF) ÷",
                            "(\\p{Word_Break=Newline} | \\p{Word_Break=CR} | \\p{Word_Break=LF})",
                            Resolution.BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "÷ ($Newline | $CR | $LF)",
                            "",
                            Resolution.BREAK,
                            "(\\p{Word_Break=Newline} | \\p{Word_Break=CR} | \\p{Word_Break=LF})"));
            rules.add(
                    new RegexRule(
                            "$ZWJ × $ExtPict",
                            "\\p{Word_Break=ZWJ}",
                            Resolution.NO_BREAK,
                            "\\p{Extended_Pictographic}"));
            rules.add(
                    new RegexRule(
                            "$WSegSpace × $WSegSpace",
                            "\\p{Word_Break=WSegSpace}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=WSegSpace}"));
            rules.add(
                    new RemapRule(
                            "(?<X>[^$CR $LF $Newline]) ($Extend | $Format | $ZWJ)* → ${X}",
                            "(?<X>[^\\p{Word_Break=CR} \\p{Word_Break=LF} \\p{Word_Break=Newline}]) (\\p{Word_Break=Extend} | [\\p{Word_Break=Format}] | \\p{Word_Break=ZWJ})*",
                            "${X}"));
            rules.add(
                    new RegexRule(
                            "$AHLetter × $AHLetter",
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]",
                            Resolution.NO_BREAK,
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(
                    new RegexRule(
                            "$AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter",
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]",
                            Resolution.NO_BREAK,
                            "(\\p{Word_Break=MidLetter} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}]) [\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(
                    new RegexRule(
                            "$AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter",
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}] (\\p{Word_Break=MidLetter} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}])",
                            Resolution.NO_BREAK,
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(
                    new RegexRule(
                            "$Hebrew_Letter × $Single_Quote",
                            "\\p{Word_Break=Hebrew_Letter}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Single_Quote}"));
            rules.add(
                    new RegexRule(
                            "$Hebrew_Letter × $Double_Quote $Hebrew_Letter",
                            "\\p{Word_Break=Hebrew_Letter}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Double_Quote} \\p{Word_Break=Hebrew_Letter}"));
            rules.add(
                    new RegexRule(
                            "$Hebrew_Letter $Double_Quote × $Hebrew_Letter",
                            "\\p{Word_Break=Hebrew_Letter} \\p{Word_Break=Double_Quote}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Hebrew_Letter}"));
            rules.add(
                    new RegexRule(
                            "$Numeric × $Numeric",
                            "\\p{Word_Break=Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$AHLetter × $Numeric",
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$Numeric × $AHLetter",
                            "\\p{Word_Break=Numeric}",
                            Resolution.NO_BREAK,
                            "[\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}]"));
            rules.add(
                    new RegexRule(
                            "$Numeric ($MidNum | $MidNumLetQ) × $Numeric",
                            "\\p{Word_Break=Numeric} (\\p{Word_Break=MidNum} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}])",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$Numeric × ($MidNum | $MidNumLetQ) $Numeric",
                            "\\p{Word_Break=Numeric}",
                            Resolution.NO_BREAK,
                            "(\\p{Word_Break=MidNum} | [\\p{Word_Break=MidNumLet} \\p{Word_Break=Single_Quote}]) \\p{Word_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$Katakana × $Katakana",
                            "\\p{Word_Break=Katakana}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Katakana}"));
            rules.add(
                    new RegexRule(
                            "($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet",
                            "([\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}] | \\p{Word_Break=Numeric} | \\p{Word_Break=Katakana} | \\p{Word_Break=ExtendNumLet})",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=ExtendNumLet}"));
            rules.add(
                    new RegexRule(
                            "$ExtendNumLet × ($AHLetter | $Numeric | $Katakana)",
                            "\\p{Word_Break=ExtendNumLet}",
                            Resolution.NO_BREAK,
                            "([\\p{Word_Break=ALetter} \\p{Word_Break=Hebrew_Letter}] | \\p{Word_Break=Numeric} | \\p{Word_Break=Katakana})"));
            rules.add(
                    new RegexRule(
                            "^ ($RI $RI)* $RI × $RI",
                            "^ (\\p{Word_Break=Regional_Indicator} \\p{Word_Break=Regional_Indicator})* \\p{Word_Break=Regional_Indicator}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Regional_Indicator}"));
            rules.add(
                    new RegexRule(
                            "[^$RI] ($RI $RI)* $RI × $RI",
                            "[^\\p{Word_Break=Regional_Indicator}] (\\p{Word_Break=Regional_Indicator} \\p{Word_Break=Regional_Indicator})* \\p{Word_Break=Regional_Indicator}",
                            Resolution.NO_BREAK,
                            "\\p{Word_Break=Regional_Indicator}"));
            // --- End of generated code. ---

            // TODO(egg): This could just as well be part of the rules…
            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }

    static class RBBILineMonkey extends RBBIMonkeyKind {
        RBBILineMonkey() {
            fCharProperty = UProperty.LINE_BREAK;

            dictionarySet = new UnicodeSet("\\p{lb=SA}");

            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            rules.add(new RegexRule("sot ÷ contra LB2", "^", Resolution.BREAK, ""));
            // This one could be part of the rules.
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("LB3 ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(
                    new NamedSet(
                            "AI_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Ambiguous}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "AImEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Ambiguous}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("AK", new UnicodeSet("[\\p{Line_Break=Aksara}]")));
            partition.add(
                    new NamedSet(
                            "ALorig_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Alphabetic}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "ALorig_DottedCircle",
                            new UnicodeSet("[\\p{Line_Break=Alphabetic}&[◌]]")));
            partition.add(
                    new NamedSet(
                            "ALorigmEastAsianmDottedCircle",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Alphabetic}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[◌]]")));
            partition.add(new NamedSet("AP", new UnicodeSet("[\\p{Line_Break=Aksara_Prebase}]")));
            partition.add(new NamedSet("AS", new UnicodeSet("[\\p{Line_Break=Aksara_Start}]")));
            partition.add(new NamedSet("B2", new UnicodeSet("[\\p{Line_Break=Break_Both}]")));
            partition.add(
                    new NamedSet(
                            "BA_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Break_After}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "BAmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Break_After}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("BB", new UnicodeSet("[\\p{Line_Break=Break_Before}]")));
            partition.add(new NamedSet("BK", new UnicodeSet("[\\p{Line_Break=Mandatory_Break}]")));
            partition.add(new NamedSet("CB", new UnicodeSet("[\\p{Line_Break=Contingent_Break}]")));
            partition.add(
                    new NamedSet(
                            "CL_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Close_Punctuation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "CLmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Close_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("CP", new UnicodeSet("[\\p{Line_Break=CP}]")));
            partition.add(
                    new NamedSet(
                            "CMorig_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Combining_Mark}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "CMorigmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Combining_Mark}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Line_Break=Carriage_Return}]")));
            partition.add(
                    new NamedSet(
                            "EX_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Exclamation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "EXmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Exclamation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "GL_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Glue}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "GLmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Glue}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("H2", new UnicodeSet("[\\p{Line_Break=H2}]")));
            partition.add(new NamedSet("H3", new UnicodeSet("[\\p{Line_Break=H3}]")));
            partition.add(
                    new NamedSet("HH", new UnicodeSet("[\\p{Line_Break=Unambiguous_Hyphen}]")));
            partition.add(new NamedSet("HL", new UnicodeSet("[\\p{Line_Break=HL}]")));
            partition.add(new NamedSet("HY", new UnicodeSet("[\\p{Line_Break=Hyphen}]")));
            partition.add(
                    new NamedSet(
                            "ID_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Ideographic}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "ID_ExtPictUnassigned",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Ideographic}&[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(
                    new NamedSet(
                            "IDmEastAsianmExtPictUnassigned",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Ideographic}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(
                    new NamedSet(
                            "IN_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Inseparable}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "INmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Inseparable}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("IS", new UnicodeSet("[\\p{Line_Break=Infix_Numeric}]")));
            partition.add(new NamedSet("JL", new UnicodeSet("[\\p{Line_Break=JL}]")));
            partition.add(new NamedSet("JT", new UnicodeSet("[\\p{Line_Break=JT}]")));
            partition.add(new NamedSet("JV", new UnicodeSet("[\\p{Line_Break=JV}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Line_Break=Line_Feed}]")));
            partition.add(new NamedSet("NL", new UnicodeSet("[\\p{Line_Break=Next_Line}]")));
            partition.add(
                    new NamedSet(
                            "NSorig_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Nonstarter}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "NSorigmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Nonstarter}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("NU", new UnicodeSet("[\\p{Line_Break=Numeric}]")));
            partition.add(
                    new NamedSet(
                            "OP_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Open_Punctuation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "OPmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Open_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "PO_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Postfix_Numeric}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "POmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Postfix_Numeric}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "PR_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Prefix_Numeric}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "PRmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Prefix_Numeric}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "QU_Pi", new UnicodeSet("[\\p{Line_Break=Quotation}&\\p{gc=Pi}]")));
            partition.add(
                    new NamedSet(
                            "QU_Pf", new UnicodeSet("[\\p{Line_Break=Quotation}&\\p{gc=Pf}]")));
            partition.add(
                    new NamedSet(
                            "QUmPimPf",
                            new UnicodeSet("[\\p{Line_Break=Quotation}-\\p{gc=Pi}-\\p{gc=Pf}]")));
            partition.add(
                    new NamedSet(
                            "SA_Mn",
                            new UnicodeSet("[[\\p{Line_Break=Complex_Context}&\\p{gc=Mn}]]")));
            partition.add(
                    new NamedSet(
                            "SA_Mc",
                            new UnicodeSet("[[\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]]")));
            partition.add(
                    new NamedSet(
                            "SAmMnmMc",
                            new UnicodeSet(
                                    "[[\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]]")));
            partition.add(new NamedSet("SG", new UnicodeSet("[\\p{Line_Break=Surrogate}]")));
            partition.add(new NamedSet("SP", new UnicodeSet("[\\p{Line_Break=Space}]")));
            partition.add(new NamedSet("SY", new UnicodeSet("[\\p{Line_Break=Break_Symbols}]")));
            partition.add(new NamedSet("VF", new UnicodeSet("[\\p{Line_Break=Virama_Final}]")));
            partition.add(new NamedSet("VI", new UnicodeSet("[\\p{Line_Break=Virama}]")));
            partition.add(new NamedSet("WJ", new UnicodeSet("[\\p{Line_Break=Word_Joiner}]")));
            partition.add(
                    new NamedSet(
                            "XX_ExtPictUnassigned",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Unknown}&[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(
                    new NamedSet(
                            "XXmExtPictUnassigned",
                            new UnicodeSet(
                                    "[\\p{Line_Break=Unknown}-[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]]")));
            partition.add(new NamedSet("ZW", new UnicodeSet("[\\p{Line_Break=ZWSpace}]")));
            partition.add(
                    new NamedSet(
                            "CJ",
                            new UnicodeSet("[\\p{Line_Break=Conditional_Japanese_Starter}]")));
            partition.add(
                    new NamedSet("RI", new UnicodeSet("[\\p{Line_Break=Regional_Indicator}]")));
            partition.add(
                    new NamedSet(
                            "EB_EastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=E_Base}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(
                    new NamedSet(
                            "EBmEastAsian",
                            new UnicodeSet(
                                    "[\\p{Line_Break=E_Base}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
            partition.add(new NamedSet("EM", new UnicodeSet("[\\p{Line_Break=E_Modifier}]")));
            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Line_Break=ZWJ}]")));

            rules.add(
                    new RegexRule(
                            "$BK ÷", "\\p{Line_Break=Mandatory_Break}", Resolution.BREAK, ""));
            rules.add(
                    new RegexRule(
                            "$CR × $LF",
                            "\\p{Line_Break=Carriage_Return}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Line_Feed}"));
            rules.add(
                    new RegexRule(
                            "$CR ÷", "\\p{Line_Break=Carriage_Return}", Resolution.BREAK, ""));
            rules.add(new RegexRule("$LF ÷", "\\p{Line_Break=Line_Feed}", Resolution.BREAK, ""));
            rules.add(new RegexRule("$NL ÷", "\\p{Line_Break=Next_Line}", Resolution.BREAK, ""));
            rules.add(
                    new RegexRule(
                            "× ( $BK | $CR | $LF | $NL )",
                            "",
                            Resolution.NO_BREAK,
                            "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} )"));
            rules.add(new RegexRule("× $SP", "", Resolution.NO_BREAK, "\\p{Line_Break=Space}"));
            rules.add(new RegexRule("× $ZW", "", Resolution.NO_BREAK, "\\p{Line_Break=ZWSpace}"));
            rules.add(
                    new RegexRule(
                            "$ZW $SP* ÷",
                            "\\p{Line_Break=ZWSpace} \\p{Line_Break=Space}*",
                            Resolution.BREAK,
                            ""));
            rules.add(new RegexRule("$ZWJ ×", "\\p{Line_Break=ZWJ}", Resolution.NO_BREAK, ""));
            rules.add(
                    new RemapRule(
                            "(?<X>[^$BK $CR $LF $NL $SP $ZW]) ( $CM | $ZWJ )* → ${X}",
                            "(?<X>[^\\p{Line_Break=Mandatory_Break} \\p{Line_Break=Carriage_Return} \\p{Line_Break=Line_Feed} \\p{Line_Break=Next_Line} \\p{Line_Break=Space} \\p{Line_Break=ZWSpace}]) ( [\\p{Line_Break=Combining_Mark} [\\p{Line_Break=Complex_Context}&\\p{gc=Mn}] [\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]] | \\p{Line_Break=ZWJ} )*",
                            "${X}"));
            rules.add(
                    new RemapRule(
                            "( $CM | $ZWJ ) → A",
                            "( [\\p{Line_Break=Combining_Mark} [\\p{Line_Break=Complex_Context}&\\p{gc=Mn}] [\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]] | \\p{Line_Break=ZWJ} )",
                            "A"));
            rules.add(
                    new RegexRule("× $WJ", "", Resolution.NO_BREAK, "\\p{Line_Break=Word_Joiner}"));
            rules.add(
                    new RegexRule("$WJ ×", "\\p{Line_Break=Word_Joiner}", Resolution.NO_BREAK, ""));
            rules.add(new RegexRule("$GL ×", "\\p{Line_Break=Glue}", Resolution.NO_BREAK, ""));
            rules.add(
                    new RegexRule(
                            "[^ $SP $BA $HY $HH] × $GL",
                            "[^ \\p{Line_Break=Space} \\p{Line_Break=Break_After} \\p{Line_Break=Hyphen} \\p{Line_Break=Unambiguous_Hyphen}]",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Glue}"));
            rules.add(
                    new RegexRule("× $EX", "", Resolution.NO_BREAK, "\\p{Line_Break=Exclamation}"));
            rules.add(
                    new RegexRule(
                            "× $CL", "", Resolution.NO_BREAK, "\\p{Line_Break=Close_Punctuation}"));
            rules.add(new RegexRule("× $CP", "", Resolution.NO_BREAK, "\\p{Line_Break=CP}"));
            rules.add(
                    new RegexRule(
                            "× $SY", "", Resolution.NO_BREAK, "\\p{Line_Break=Break_Symbols}"));
            rules.add(
                    new RegexRule(
                            "$OP $SP* ×",
                            "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Space}*",
                            Resolution.NO_BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "( $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW | $sot ) $QU_Pi $SP* ×",
                            "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=Open_Punctuation} | \\p{Line_Break=Quotation} | \\p{Line_Break=Glue} | \\p{Line_Break=Space} | \\p{Line_Break=ZWSpace} | ^ ) [\\p{Line_Break=Quotation} & \\p{gc=Pi}] \\p{Line_Break=Space}*",
                            Resolution.NO_BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "× $QU_Pf ( $SP | $GL | $WJ | $CL | $QU | $CP | $EX | $IS | $SY | $BK | $CR | $LF | $NL | $ZW | $eot )",
                            "",
                            Resolution.NO_BREAK,
                            "[\\p{Line_Break=Quotation} & \\p{gc=Pf}] ( \\p{Line_Break=Space} | \\p{Line_Break=Glue} | \\p{Line_Break=Word_Joiner} | \\p{Line_Break=Close_Punctuation} | \\p{Line_Break=Quotation} | \\p{Line_Break=CP} | \\p{Line_Break=Exclamation} | \\p{Line_Break=Infix_Numeric} | \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=ZWSpace} | (?!.) )"));
            rules.add(
                    new RegexRule(
                            "$SP ÷ $IS $NU",
                            "\\p{Line_Break=Space}",
                            Resolution.BREAK,
                            "\\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "× $IS", "", Resolution.NO_BREAK, "\\p{Line_Break=Infix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "($CL | $CP) $SP* × $NS",
                            "(\\p{Line_Break=Close_Punctuation} | \\p{Line_Break=CP}) \\p{Line_Break=Space}*",
                            Resolution.NO_BREAK,
                            "[\\p{Line_Break=Nonstarter} \\p{Line_Break=Conditional_Japanese_Starter}]"));
            rules.add(
                    new RegexRule(
                            "$B2 $SP* × $B2",
                            "\\p{Line_Break=Break_Both} \\p{Line_Break=Space}*",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Break_Both}"));
            rules.add(new RegexRule("$SP ÷", "\\p{Line_Break=Space}", Resolution.BREAK, ""));
            rules.add(
                    new RegexRule(
                            "× $QUmPi",
                            "",
                            Resolution.NO_BREAK,
                            "[\\p{Line_Break=Quotation} - \\p{gc=Pi}]"));
            rules.add(
                    new RegexRule(
                            "$QUmPf ×",
                            "[\\p{Line_Break=Quotation} - \\p{gc=Pf}]",
                            Resolution.NO_BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "[^$EastAsian] × $QU",
                            "[^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Quotation}"));
            rules.add(
                    new RegexRule(
                            "× $QU ( [^$EastAsian] | $eot )",
                            "",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Quotation} ( [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]] | (?!.) )"));
            rules.add(
                    new RegexRule(
                            "$QU × [^$EastAsian]",
                            "\\p{Line_Break=Quotation}",
                            Resolution.NO_BREAK,
                            "[^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"));
            rules.add(
                    new RegexRule(
                            "( [^$EastAsian] | $sot ) $QU ×",
                            "( [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]] | ^ ) \\p{Line_Break=Quotation}",
                            Resolution.NO_BREAK,
                            ""));
            rules.add(
                    new RegexRule(
                            "÷ $CB", "", Resolution.BREAK, "\\p{Line_Break=Contingent_Break}"));
            rules.add(
                    new RegexRule(
                            "$CB ÷", "\\p{Line_Break=Contingent_Break}", Resolution.BREAK, ""));
            rules.add(
                    new RegexRule(
                            "( $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL | $sot ) ( $HY | $HH ) × ( $AL | $HL )",
                            "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=Space} | \\p{Line_Break=ZWSpace} | \\p{Line_Break=Contingent_Break} | \\p{Line_Break=Glue} | ^ ) ( \\p{Line_Break=Hyphen} | \\p{Line_Break=Unambiguous_Hyphen} )",
                            Resolution.NO_BREAK,
                            "( [\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} )"));
            rules.add(
                    new RegexRule("× $BA", "", Resolution.NO_BREAK, "\\p{Line_Break=Break_After}"));
            rules.add(
                    new RegexRule(
                            "× $HH",
                            "",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Unambiguous_Hyphen}"));
            rules.add(new RegexRule("× $HY", "", Resolution.NO_BREAK, "\\p{Line_Break=Hyphen}"));
            rules.add(
                    new RegexRule(
                            "× $NS",
                            "",
                            Resolution.NO_BREAK,
                            "[\\p{Line_Break=Nonstarter} \\p{Line_Break=Conditional_Japanese_Starter}]"));
            rules.add(
                    new RegexRule(
                            "$BB ×", "\\p{Line_Break=Break_Before}", Resolution.NO_BREAK, ""));
            rules.add(
                    new RegexRule(
                            "$HL ($HY | $HH) × [^$HL]",
                            "\\p{Line_Break=HL} (\\p{Line_Break=Hyphen} | \\p{Line_Break=Unambiguous_Hyphen})",
                            Resolution.NO_BREAK,
                            "[^\\p{Line_Break=HL}]"));
            rules.add(
                    new RegexRule(
                            "$SY × $HL",
                            "\\p{Line_Break=Break_Symbols}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=HL}"));
            rules.add(
                    new RegexRule("× $IN", "", Resolution.NO_BREAK, "\\p{Line_Break=Inseparable}"));
            rules.add(
                    new RegexRule(
                            "($AL | $HL) × $NU",
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU × ($AL | $HL)",
                            "\\p{Line_Break=Numeric}",
                            Resolution.NO_BREAK,
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(
                    new RegexRule(
                            "$PR × ($ID | $EB | $EM)",
                            "\\p{Line_Break=Prefix_Numeric}",
                            Resolution.NO_BREAK,
                            "(\\p{Line_Break=Ideographic} | \\p{Line_Break=E_Base} | \\p{Line_Break=E_Modifier})"));
            rules.add(
                    new RegexRule(
                            "($ID | $EB | $EM) × $PO",
                            "(\\p{Line_Break=Ideographic} | \\p{Line_Break=E_Base} | \\p{Line_Break=E_Modifier})",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "($PR | $PO) × ($AL | $HL)",
                            "(\\p{Line_Break=Prefix_Numeric} | \\p{Line_Break=Postfix_Numeric})",
                            Resolution.NO_BREAK,
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(
                    new RegexRule(
                            "($AL | $HL) × ($PR | $PO)",
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})",
                            Resolution.NO_BREAK,
                            "(\\p{Line_Break=Prefix_Numeric} | \\p{Line_Break=Postfix_Numeric})"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* $CL × $PO",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=Close_Punctuation}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* $CP × $PO",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=CP}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* $CL × $PR",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=Close_Punctuation}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Prefix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* $CP × $PR",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=CP}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Prefix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* × $PO",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* × $PR",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Prefix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PO × $OP $NU",
                            "\\p{Line_Break=Postfix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PO × $OP $IS $NU",
                            "\\p{Line_Break=Postfix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PO × $NU",
                            "\\p{Line_Break=Postfix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PR × $OP $NU",
                            "\\p{Line_Break=Prefix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PR × $OP $IS $NU",
                            "\\p{Line_Break=Prefix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PR × $NU",
                            "\\p{Line_Break=Prefix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$HY × $NU",
                            "\\p{Line_Break=Hyphen}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$IS × $NU",
                            "\\p{Line_Break=Infix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$NU ( $SY | $IS )* × $NU",
                            "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "$JL × $JL | $JV | $H2 | $H3",
                            "\\p{Line_Break=JL}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=H2} | \\p{Line_Break=H3}"));
            rules.add(
                    new RegexRule(
                            "$JV | $H2 × $JV | $JT",
                            "\\p{Line_Break=JV} | \\p{Line_Break=H2}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=JV} | \\p{Line_Break=JT}"));
            rules.add(
                    new RegexRule(
                            "$JT | $H3 × $JT",
                            "\\p{Line_Break=JT} | \\p{Line_Break=H3}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=JT}"));
            rules.add(
                    new RegexRule(
                            "$JL | $JV | $JT | $H2 | $H3 × $PO",
                            "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=JT} | \\p{Line_Break=H2} | \\p{Line_Break=H3}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Postfix_Numeric}"));
            rules.add(
                    new RegexRule(
                            "$PR × $JL | $JV | $JT | $H2 | $H3",
                            "\\p{Line_Break=Prefix_Numeric}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=JT} | \\p{Line_Break=H2} | \\p{Line_Break=H3}"));
            rules.add(
                    new RegexRule(
                            "($AL | $HL) × ($AL | $HL)",
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})",
                            Resolution.NO_BREAK,
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(
                    new RegexRule(
                            "$AP × ($AK | $DottedCircle | $AS)",
                            "\\p{Line_Break=Aksara_Prebase}",
                            Resolution.NO_BREAK,
                            "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})"));
            rules.add(
                    new RegexRule(
                            "($AK | $DottedCircle | $AS) × ($VF | $VI)",
                            "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})",
                            Resolution.NO_BREAK,
                            "(\\p{Line_Break=Virama_Final} | \\p{Line_Break=Virama})"));
            rules.add(
                    new RegexRule(
                            "($AK | $DottedCircle | $AS) $VI × ($AK | $DottedCircle)",
                            "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start}) \\p{Line_Break=Virama}",
                            Resolution.NO_BREAK,
                            "(\\p{Line_Break=Aksara} | [◌])"));
            rules.add(
                    new RegexRule(
                            "($AK | $DottedCircle | $AS) × ($AK | $DottedCircle | $AS) $VF",
                            "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})",
                            Resolution.NO_BREAK,
                            "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start}) \\p{Line_Break=Virama_Final}"));
            rules.add(
                    new RegexRule(
                            "$IS × ($AL | $HL)",
                            "\\p{Line_Break=Infix_Numeric}",
                            Resolution.NO_BREAK,
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
            rules.add(
                    new RegexRule(
                            "($AL | $HL | $NU) × $OPmEastAsian",
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} | \\p{Line_Break=Numeric})",
                            Resolution.NO_BREAK,
                            "[\\p{Line_Break=Open_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"));
            rules.add(
                    new RegexRule(
                            "$CPmEastAsian × ($AL | $HL | $NU)",
                            "[\\p{Line_Break=CP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]",
                            Resolution.NO_BREAK,
                            "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} | \\p{Line_Break=Numeric})"));
            rules.add(
                    new RegexRule(
                            "$sot ($RI $RI)* $RI × $RI",
                            "^ (\\p{Line_Break=Regional_Indicator} \\p{Line_Break=Regional_Indicator})* \\p{Line_Break=Regional_Indicator}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Regional_Indicator}"));
            rules.add(
                    new RegexRule(
                            "[^$RI] ($RI $RI)* $RI × $RI",
                            "[^\\p{Line_Break=Regional_Indicator}] (\\p{Line_Break=Regional_Indicator} \\p{Line_Break=Regional_Indicator})* \\p{Line_Break=Regional_Indicator}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=Regional_Indicator}"));
            rules.add(
                    new RegexRule(
                            "$RI ÷ $RI",
                            "\\p{Line_Break=Regional_Indicator}",
                            Resolution.BREAK,
                            "\\p{Line_Break=Regional_Indicator}"));
            rules.add(
                    new RegexRule(
                            "$EB × $EM",
                            "\\p{Line_Break=E_Base}",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=E_Modifier}"));
            rules.add(
                    new RegexRule(
                            "$ExtPictUnassigned × $EM",
                            "[\\p{Extended_Pictographic=True}&\\p{gc=Cn}]",
                            Resolution.NO_BREAK,
                            "\\p{Line_Break=E_Modifier}"));
            // --- End of generated code. ---

            // TODO(egg): This could just as well be part of the rules…
            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }

    /** Sentence Monkey Test Class */
    static class RBBISentenceMonkey extends RBBIMonkeyKind {

        RBBISentenceMonkey() {
            fCharProperty = UProperty.SENTENCE_BREAK;
            List<NamedSet> partition = new ArrayList<>();
            rules = new ArrayList<>();

            // These two could be part of the rules.
            rules.add(new RegexRule("WB1 sot ÷ Any", "^", Resolution.BREAK, ""));
            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
            // The generated rules use the same (?!.).
            rules.add(new RegexRule("WB2 Any ÷ eot", "", Resolution.BREAK, "(?!.)"));

            // --- NOLI ME TANGERE ---
            // Generated by GenerateBreakTest.java in the Unicode tools.
            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Sentence_Break=CR}]")));
            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Sentence_Break=LF}]")));
            partition.add(new NamedSet("Extend", new UnicodeSet("[\\p{Sentence_Break=Extend}]")));
            partition.add(new NamedSet("Format", new UnicodeSet("[\\p{Sentence_Break=Format}]")));
            partition.add(new NamedSet("Sep", new UnicodeSet("[\\p{Sentence_Break=Sep}]")));
            partition.add(new NamedSet("Sp", new UnicodeSet("[\\p{Sentence_Break=Sp}]")));
            partition.add(new NamedSet("Lower", new UnicodeSet("[\\p{Sentence_Break=Lower}]")));
            partition.add(new NamedSet("Upper", new UnicodeSet("[\\p{Sentence_Break=Upper}]")));
            partition.add(new NamedSet("OLetter", new UnicodeSet("[\\p{Sentence_Break=OLetter}]")));
            partition.add(new NamedSet("Numeric", new UnicodeSet("[\\p{Sentence_Break=Numeric}]")));
            partition.add(new NamedSet("ATerm", new UnicodeSet("[\\p{Sentence_Break=ATerm}]")));
            partition.add(new NamedSet("STerm", new UnicodeSet("[\\p{Sentence_Break=STerm}]")));
            partition.add(new NamedSet("Close", new UnicodeSet("[\\p{Sentence_Break=Close}]")));
            partition.add(
                    new NamedSet("SContinue", new UnicodeSet("[\\p{Sentence_Break=SContinue}]")));
            partition.add(new NamedSet("XX", new UnicodeSet("[\\p{Sentence_Break=Other}]")));

            rules.add(
                    new RegexRule(
                            "$CR × $LF",
                            "\\p{Sentence_Break=CR}",
                            Resolution.NO_BREAK,
                            "\\p{Sentence_Break=LF}"));
            rules.add(
                    new RegexRule(
                            "$ParaSep ÷",
                            "[\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}]",
                            Resolution.BREAK,
                            ""));
            rules.add(
                    new RemapRule(
                            "(?<X>[^$ParaSep]) ( $Extend | $Format )* → ${X}",
                            "(?<X>[^[\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}]]) ( \\p{Sentence_Break=Extend} | \\p{Sentence_Break=Format} )*",
                            "${X}"));
            rules.add(
                    new RegexRule(
                            "$ATerm × $Numeric",
                            "\\p{Sentence_Break=ATerm}",
                            Resolution.NO_BREAK,
                            "\\p{Sentence_Break=Numeric}"));
            rules.add(
                    new RegexRule(
                            "($Upper | $Lower) $ATerm × $Upper",
                            "(\\p{Sentence_Break=Upper} | \\p{Sentence_Break=Lower}) \\p{Sentence_Break=ATerm}",
                            Resolution.NO_BREAK,
                            "\\p{Sentence_Break=Upper}"));
            rules.add(
                    new RegexRule(
                            "$ATerm $Close* $Sp* × [^ $OLetter $Upper $Lower $ParaSep $SATerm]* $Lower",
                            "\\p{Sentence_Break=ATerm} \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}*",
                            Resolution.NO_BREAK,
                            "[^ \\p{Sentence_Break=OLetter} \\p{Sentence_Break=Upper} \\p{Sentence_Break=Lower} [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}] [\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}]]* \\p{Sentence_Break=Lower}"));
            rules.add(
                    new RegexRule(
                            "$SATerm $Close* $Sp* × ($SContinue | $SATerm)",
                            "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}*",
                            Resolution.NO_BREAK,
                            "(\\p{Sentence_Break=SContinue} | [\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}])"));
            rules.add(
                    new RegexRule(
                            "$SATerm $Close* × ( $Close | $Sp | $ParaSep )",
                            "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}*",
                            Resolution.NO_BREAK,
                            "( \\p{Sentence_Break=Close} | \\p{Sentence_Break=Sp} | [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}] )"));
            rules.add(
                    new RegexRule(
                            "$SATerm $Close* $Sp* × ( $Sp | $ParaSep )",
                            "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}*",
                            Resolution.NO_BREAK,
                            "( \\p{Sentence_Break=Sp} | [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}] )"));
            rules.add(
                    new RegexRule(
                            "$SATerm $Close* $Sp* $ParaSep? ÷",
                            "[\\p{Sentence_Break=STerm} \\p{Sentence_Break=ATerm}] \\p{Sentence_Break=Close}* \\p{Sentence_Break=Sp}* [\\p{Sentence_Break=Sep} \\p{Sentence_Break=CR} \\p{Sentence_Break=LF}]?",
                            Resolution.BREAK,
                            ""));
            rules.add(new RegexRule("× $Any", "", Resolution.NO_BREAK, "."));
            // --- End of generated code. ---

            for (final NamedSet part : partition) {
                sets.add(part.set);
                classNames.add(part.name);
            }
        }
    }

    /**
     * Move an index into a string by n code points. Similar to UTF16.moveCodePointOffset, but
     * without the exceptions, which were complicating usage.
     *
     * @param s a Text string
     * @param pos The starting code unit index into the text string
     * @param amt The amount to adjust the string by.
     * @return The adjusted code unit index, pinned to the string's length, or unchanged if input
     *     index was outside of the string.
     */
    static int moveIndex32(StringBuffer s, int pos, int amt) {
        int i;
        char c;
        if (amt > 0) {
            for (i = 0; i < amt; i++) {
                if (pos >= s.length()) {
                    return s.length();
                }
                c = s.charAt(pos);
                pos++;
                if (UTF16.isLeadSurrogate(c) && pos < s.length()) {
                    c = s.charAt(pos);
                    if (UTF16.isTrailSurrogate(c)) {
                        pos++;
                    }
                }
            }
        } else {
            for (i = 0; i > amt; i--) {
                if (pos <= 0) {
                    return 0;
                }
                pos--;
                c = s.charAt(pos);
                if (UTF16.isTrailSurrogate(c) && pos > 0) {
                    c = s.charAt(pos - 1);
                    if (UTF16.isLeadSurrogate(c)) {
                        pos--;
                    }
                }
            }
        }
        return pos;
    }

    /**
     * No-exceptions form of UnicodeSet.contains(c). Simplifies loops that terminate with an
     * end-of-input character value.
     *
     * @param s A unicode set
     * @param c A code point value
     * @return true if the set contains c.
     */
    static boolean setContains(UnicodeSet s, int c) {
        if (c < 0 || c > UTF16.CODEPOINT_MAX_VALUE) {
            return false;
        }
        return s.contains(c);
    }

    /**
     * return the index of the next code point in the input text.
     *
     * @param i the preceding index
     */
    static int nextCP(StringBuffer s, int i) {
        if (i == -1) {
            // End of Input indication.  Continue to return end value.
            return -1;
        }
        int retVal = i + 1;
        if (retVal > s.length()) {
            return -1;
        }
        int c = UTF16.charAt(s, i);
        if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && UTF16.isLeadSurrogate(s.charAt(i))) {
            retVal++;
        }
        return retVal;
    }

    /**
     * random number generator. Not using Java's built-in Randoms for two reasons: 1. Using this
     * code allows obtaining the same sequences as those from the ICU4C monkey test. 2. We need to
     * get and restore the seed from values occurring in the middle of a long sequence, to more
     * easily reproduce failing cases. TODO(egg): We need a better random number generator; ideally
     * the same as in C++, but that may be tricky.
     */
    private static int m_seed = 1;

    private static int m_rand() {
        m_seed = m_seed * 1103515245 + 12345;
        return (m_seed >>> 16) % 32768;
    }

    private static final String[] monkeys = new String[] {"🙈", "🙉", "🙊", "🐵", "🐒"};

    // Helper function for formatting error output.
    //   Append a string into a fixed-size field in a StringBuffer.
    //   Blank-pad the string if it is shorter than the field.
    //   Truncate the source string if it is too long.
    //
    private static void appendToBuf(StringBuffer dest, String src, int fieldLen) {
        int appendLen = src.length();
        if (appendLen >= fieldLen) {
            dest.append(src.substring(0, fieldLen));
        } else {
            dest.append(src);
            while (appendLen < fieldLen) {
                dest.append(' ');
                appendLen++;
            }
        }
    }

    // Helper function for formatting error output.
    // Display a code point in "\\uxxxx" or "\Uxxxxxxxx" format
    @SuppressWarnings("unused")
    private static void appendCharToBuf(StringBuffer dest, int c, int fieldLen) {
        String hexChars = "0123456789abcdef";
        if (c < 0x10000) {
            dest.append("\\u");
            for (int bn = 12; bn >= 0; bn -= 4) {
                dest.append(hexChars.charAt(((c) >> bn) & 0xf));
            }
            appendToBuf(dest, " ", fieldLen - 6);
        } else {
            dest.append("\\U");
            for (int bn = 28; bn >= 0; bn -= 4) {
                dest.append(hexChars.charAt(((c) >> bn) & 0xf));
            }
            appendToBuf(dest, " ", fieldLen - 10);
        }
    }

    /**
     * Run a RBBI monkey test. Common routine, for all break iterator types. Parameters: bi - the
     * break iterator to use mk - MonkeyKind, abstraction for obtaining expected results name - Name
     * of test (char, word, etc.) for use in error messages seed - Seed for starting random number
     * generator (parameter from user) numIterations
     */
    void RunMonkey(BreakIterator bi, RBBIMonkeyKind mk, String name, int seed, int numIterations) {
        int TESTSTRINGLEN = 500;
        StringBuffer testText = new StringBuffer();
        int numCharClasses;
        List<UnicodeSet> chClasses;
        @SuppressWarnings("unused")
        int expectedCount = 0;
        boolean[] expectedBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
        boolean[] forwardBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
        boolean[] reverseBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
        boolean[] isBoundaryBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
        boolean[] followingBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
        boolean[] precedingBreaks = new boolean[TESTSTRINGLEN * 2 + 1];
        int i;
        int loopCount = 0;
        int errorCount = 0;
        boolean printTestData = false;
        boolean printBreaksFromBI = false;

        m_seed = seed;

        numCharClasses = mk.charClasses().size();
        chClasses = mk.charClasses();

        // Verify that the character classes all have at least one member.
        for (i = 0; i < numCharClasses; i++) {
            UnicodeSet s = (UnicodeSet) chClasses.get(i);
            if (s == null || s.size() == 0) {
                errln("Character Class " + i + " is null or of zero size.");
                return;
            }
        }

        // --------------------------------------------------------------------------------------------
        //
        //  Debugging settings.  Comment out everything in the following block for normal operation
        //
        // --------------------------------------------------------------------------------------------
        // numIterations = -1;
        // numIterations = 10000;   // Same as exhaustive.
        // RuleBasedBreakIterator_New.fTrace = true;
        // m_seed = 859056465;
        // TESTSTRINGLEN = 50;
        // printTestData = true;
        // printBreaksFromBI = true;
        // ((RuleBasedBreakIterator_New)bi).dump();

        // --------------------------------------------------------------------------------------------
        //
        //  End of Debugging settings.
        //
        // --------------------------------------------------------------------------------------------

        // For minimizing width of class name output.
        int classNameSize = mk.maxClassNameSize();
        while (loopCount < numIterations || numIterations == -1) {
            if (numIterations == -1 && loopCount % 10 == 0) {
                // If test is running in an infinite loop, display a periodic tic so
                //   we can tell that it is making progress.
                System.out.print(monkeys[m_rand() % monkeys.length]);
                if (loopCount % 1_000_000 == 0) {
                    System.out.println(
                            "\nTested "
                                    + loopCount / 1_000_000
                                    + " million random strings with "
                                    + errorCount
                                    + " errors");
                }
            }
            // Save current random number seed, so that we can recreate the random numbers
            //   for this loop iteration in event of an error.
            seed = m_seed;

            testText.setLength(0);
            // Populate a test string with data.
            if (printTestData) {
                System.out.println("Test Data string ...");
            }
            final boolean java8OrOlder = System.getProperty("java.version").startsWith("1.");
            for (i = 0; i < TESTSTRINGLEN; i++) {
                int aClassNum = m_rand() % numCharClasses;
                UnicodeSet classSet = (UnicodeSet) chClasses.get(aClassNum);
                int charIdx = m_rand() % classSet.size();
                int c = classSet.charAt(charIdx);
                if (c < 0) { // TODO:  deal with sets containing strings.
                    errln("c < 0");
                }
                if (mk.getDictionarySet().contains(c)) {
                    continue;
                }
                // Do not emit surrogates on Java 8, as the behaviour of regular expressions that
                // match surrogates differs there.
                if (java8OrOlder
                        && Character.isBmpCodePoint(c)
                        && Character.isSurrogate((char) c)) {
                    continue;
                }
                // Do not assemble a supplementary character from randomly generated separate
                // surrogates.
                //   (It could be a dictionary character)
                if (c < 0x10000
                        && Character.isLowSurrogate((char) c)
                        && testText.length() > 0
                        && Character.isHighSurrogate(testText.charAt(testText.length() - 1))) {
                    continue;
                }
                testText.appendCodePoint(c);
                if (printTestData) {
                    System.out.print(Integer.toHexString(c) + " ");
                }
            }
            if (printTestData) {
                System.out.println();
            }

            Arrays.fill(expectedBreaks, false);
            Arrays.fill(forwardBreaks, false);
            Arrays.fill(reverseBreaks, false);
            Arrays.fill(isBoundaryBreaks, false);
            Arrays.fill(followingBreaks, false);
            Arrays.fill(precedingBreaks, false);

            // Calculate the expected results for this test string and reset applied rules.
            mk.setText(testText);
            expectedCount = 0;
            expectedBreaks[0] = true;
            int breakPos = 0;
            int lastBreakPos = -1;
            for (; ; ) {
                lastBreakPos = breakPos;
                breakPos = mk.next(breakPos);
                if (breakPos == -1) {
                    break;
                }
                if (breakPos > testText.length()) {
                    errln("breakPos > testText.length()");
                }
                if (lastBreakPos >= breakPos) {
                    errln("Next() not increasing.");
                    // break;
                }
                expectedBreaks[breakPos] = true;
            }

            // Find the break positions using forward iteration
            if (printBreaksFromBI) {
                System.out.println("Breaks from BI...");
            }
            bi.setText(testText.toString());
            for (i = bi.first(); i != BreakIterator.DONE; i = bi.next()) {
                if (i < 0 || i > testText.length()) {
                    errln(
                            name
                                    + " break monkey test: Out of range value returned by breakIterator::next()");
                    break;
                }
                if (printBreaksFromBI) {
                    System.out.print(Integer.toHexString(i) + " ");
                }
                forwardBreaks[i] = true;
            }
            if (printBreaksFromBI) {
                System.out.println();
            }

            // Find the break positions using reverse iteration
            for (i = bi.last(); i != BreakIterator.DONE; i = bi.previous()) {
                if (i < 0 || i > testText.length()) {
                    errln(
                            name
                                    + " break monkey test: Out of range value returned by breakIterator.next()"
                                    + name);
                    break;
                }
                reverseBreaks[i] = true;
            }

            // Find the break positions using isBoundary() tests.
            for (i = 0; i <= testText.length(); i++) {
                isBoundaryBreaks[i] = bi.isBoundary(i);
            }

            // Find the break positions using the following() function.
            lastBreakPos = 0;
            followingBreaks[0] = true;
            for (i = 0; i < testText.length(); i++) {
                breakPos = bi.following(i);
                if (breakPos <= i
                        || breakPos < lastBreakPos
                        || breakPos > testText.length()
                        || breakPos > lastBreakPos && lastBreakPos > i) {
                    errln(
                            name
                                    + " break monkey test: "
                                    + "Out of range value returned by BreakIterator::following().\n"
                                    + "index="
                                    + i
                                    + "following returned="
                                    + breakPos
                                    + "lastBreak="
                                    + lastBreakPos);
                    precedingBreaks[i] = !expectedBreaks[i]; // Forces an error.
                } else {
                    followingBreaks[breakPos] = true;
                    lastBreakPos = breakPos;
                }
            }

            // Find the break positions using the preceding() function.
            lastBreakPos = testText.length();
            precedingBreaks[testText.length()] = true;
            for (i = testText.length(); i > 0; i--) {
                breakPos = bi.preceding(i);
                if (breakPos >= i
                        || breakPos > lastBreakPos
                        || breakPos < 0
                        || breakPos < lastBreakPos && lastBreakPos < i) {
                    errln(
                            name
                                    + " break monkey test: "
                                    + "Out of range value returned by BreakIterator::preceding().\n"
                                    + "index="
                                    + i
                                    + "preceding returned="
                                    + breakPos
                                    + "lastBreak="
                                    + lastBreakPos);
                    precedingBreaks[i] = !expectedBreaks[i]; // Forces an error.
                } else {
                    precedingBreaks[breakPos] = true;
                    lastBreakPos = breakPos;
                }
            }

            // Compare the expected and actual results.
            for (i = 0; i <= testText.length(); i++) {
                String errorType = null;
                boolean[] currentBreakData = null;
                if (forwardBreaks[i] != expectedBreaks[i]) {
                    errorType = "next()";
                    currentBreakData = forwardBreaks;
                } else if (reverseBreaks[i] != forwardBreaks[i]) {
                    errorType = "previous()";
                    currentBreakData = reverseBreaks;
                } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
                    errorType = "isBoundary()";
                    currentBreakData = isBoundaryBreaks;
                } else if (followingBreaks[i] != expectedBreaks[i]) {
                    errorType = "following()";
                    currentBreakData = followingBreaks;
                } else if (precedingBreaks[i] != expectedBreaks[i]) {
                    errorType = "preceding()";
                    currentBreakData = precedingBreaks;
                }

                if (errorType != null) {
                    ++errorCount;
                    // Format a range of the test text that includes the failure as
                    //  a data item that can be included in the rbbi test data file.

                    // Start of the range is the last point where expected and actual results
                    //   both agreed that there was a break position.
                    int startContext = i;
                    int count = 0;
                    for (; ; ) {
                        if (startContext == 0) {
                            break;
                        }
                        startContext--;
                        if (expectedBreaks[startContext]) {
                            if (count == 2) break;
                            count++;
                        }
                    }

                    // End of range is two expected breaks past the start position.
                    int endContext = i + 1;
                    int ci;
                    for (ci = 0; ci < 2; ci++) { // Number of items to include in error text.
                        for (; ; ) {
                            if (endContext >= testText.length()) {
                                break;
                            }
                            if (expectedBreaks[endContext - 1]) {
                                if (count == 0) break;
                                count--;
                            }
                            endContext++;
                        }
                    }

                    // Formatting of each line includes:
                    //   character code
                    //   reference break: '|' -> a break, '.' -> no break
                    //   actual break:    '|' -> a break, '.' -> no break
                    //   (name of character clase)
                    //   Unicode name of character
                    //   '--→' indicates location of the difference.

                    StringBuilder buffer = new StringBuilder();
                    buffer.append("\n")
                            .append(
                                    (expectedBreaks[i]
                                            ? "Break expected but not found."
                                            : "Break found but not expected."))
                            .append(
                                    String.format(
                                            " at index %d. Parameters to reproduce: -Dtest=RBBITestMonkey#Test%sMonkey -Dseed=%d -Dloop=1\n",
                                            i, name, seed));

                    int c; // Char from test data
                    for (ci = startContext;
                            ci <= endContext && ci != -1;
                            ci = nextCP(testText, ci)) {
                        if (ci == testText.length()) {
                            break; // TODO(egg): The index dance above seems wrong.
                        }
                        c = testText.codePointAt(ci);
                        buffer.append((ci == i) ? " --→" : "    ")
                                .append(String.format(" %3d : ", ci))
                                .append(!expectedBreaks[ci] ? " . " : " | ") // Reference break
                                .append(!currentBreakData[ci] ? " . " : " | "); // Actual break

                        // BMP or SMP character in hex
                        if (c >= 0x10000) {
                            buffer.append("\\U").append(String.format("%08x", c));
                        } else {
                            buffer.append("    \\u").append(String.format("%04x", c));
                        }

                        buffer.append(
                                        String.format(
                                                String.format(" %%-%ds", classNameSize),
                                                mk.classNameFromCodepoint(c)))
                                .append(String.format(" %-40s", mk.getAppliedRule(ci)))
                                .append(String.format(" %-40s\n", UCharacter.getExtendedName(c)));

                        if (ci >= endContext) {
                            break;
                        }
                    }
                    errln(buffer.toString());

                    break;
                }
            }

            loopCount++;
        }
    }

    // Test parameters are passed on the command line, or
    // via the Eclipse Run Configuration settings, arguments tab, VM parameters.
    // For example,
    //      -ea -Dseed=554654 -Dloop=1

    @Test
    public void TestCharMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 10000);
        int seed = getIntProperty("seed", 1);

        RBBICharMonkey m = new RBBICharMonkey();
        BreakIterator bi = BreakIterator.getCharacterInstance(Locale.US);
        RunMonkey(bi, m, "Char", seed, loopCount);
    }

    @Test
    public void TestWordMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 10000);
        int seed = getIntProperty("seed", 1);

        logln("Word Break Monkey Test");
        RBBIWordMonkey m = new RBBIWordMonkey();
        BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
        RunMonkey(bi, m, "Word", seed, loopCount);
    }

    @Test
    public void TestLineMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 10000);
        int seed = getIntProperty("seed", 1);

        logln("Line Break Monkey Test");
        RBBILineMonkey m = new RBBILineMonkey();
        BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
        try {
            RunMonkey(bi, m, "Line", seed, loopCount);
        } catch (IllegalArgumentException e) {
            if (e.getMessage().equals("Invalid code point U+-000001")) {
                // Looks like you used class UnicodeSet instead of class XUnicodeSet
                // (note the leading 'X').
                // See the comment before the definition of class XUnicodeSet.
                errln("Probable program error: use XUnicodeSet in RBBILineMonkey code");
            } else {
                throw e;
            }
        }
    }

    @Test
    public void TestSentMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 500 : 3000);
        int seed = getIntProperty("seed", 1);

        logln("Sentence Break Monkey Test");
        RBBISentenceMonkey m = new RBBISentenceMonkey();
        BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
        RunMonkey(bi, m, "Sent", seed, loopCount);
    }

    //
    //  Round-trip monkey tests.
    //  Verify that break iterators created from the rule source from the default
    //    break iterators still pass the monkey test for the iterator type.
    //
    //  This is a major test for the Rule Compiler.  The default break iterators are built
    //  from pre-compiled binary rule data that was created using ICU4C; these
    //  round-trip rule recompile tests verify that the Java rule compiler can
    //  rebuild break iterators from the original source rules.
    //
    @Test
    public void TestRTCharMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 2000);
        int seed = getIntProperty("seed", 1);

        RBBICharMonkey m = new RBBICharMonkey();
        BreakIterator bi = BreakIterator.getCharacterInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        RunMonkey(rtbi, m, "RTChar", seed, loopCount);
    }

    @Test
    public void TestRTWordMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 2000);
        int seed = getIntProperty("seed", 1);

        logln("Word Break Monkey Test");
        RBBIWordMonkey m = new RBBIWordMonkey();
        BreakIterator bi = BreakIterator.getWordInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        RunMonkey(rtbi, m, "RTWord", seed, loopCount);
    }

    @Test
    public void TestRTLineMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 2000);
        int seed = getIntProperty("seed", 1);

        logln("Line Break Monkey Test");
        RBBILineMonkey m = new RBBILineMonkey();
        BreakIterator bi = BreakIterator.getLineInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        try {
            RunMonkey(rtbi, m, "RTLine", seed, loopCount);
        } catch (IllegalArgumentException e) {
            if (e.getMessage().equals("Invalid code point U+-000001")) {
                // Looks like you used class UnicodeSet instead of class XUnicodeSet
                // (note the leading 'X').
                // See the comment before the definition of class XUnicodeSet.
                errln("Probable program error: use XUnicodeSet in RBBILineMonkey code");
            } else {
                throw e;
            }
        }
    }

    @Test
    public void TestRTSentMonkey() {
        int loopCount = getIntProperty("loop", isQuick() ? 200 : 1000);
        int seed = getIntProperty("seed", 1);

        logln("Sentence Break Monkey Test");
        RBBISentenceMonkey m = new RBBISentenceMonkey();
        BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
        String rules = bi.toString();
        BreakIterator rtbi = new RuleBasedBreakIterator(rules);
        RunMonkey(rtbi, m, "RTSent", seed, loopCount);
    }
}
